In [52]:
import mysql.connector
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score,confusion_matrix
In [2]:
con = mysql.connector.connect(
host = "localhost",
user = "root",
password = "Riddhi@2105",
database = "inventory"
)
In [3]:
df = pd.read_sql("SELECT id, name, sku, category, brand, quantity, price, cost_price, profit_margin, warehouse_id, supplier_id, reorder_level, damaged_units, gender, expiry_date, created_at FROM inventory",con);
C:\Users\rp301\AppData\Local\Temp\ipykernel_9912\1314326230.py:1: UserWarning: pandas only supports SQLAlchemy connectable (engine/connection) or database string URI or sqlite3 DBAPI2 connection. Other DBAPI2 objects are not tested. Please consider using SQLAlchemy.
df = pd.read_sql("SELECT id, name, sku, category, brand, quantity, price, cost_price, profit_margin, warehouse_id, supplier_id, reorder_level, damaged_units, gender, expiry_date, created_at FROM inventory",con);
In [4]:
df.head()
Out[4]:
| id | name | sku | category | brand | quantity | price | cost_price | profit_margin | warehouse_id | supplier_id | reorder_level | damaged_units | gender | expiry_date | created_at | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | 0 | SKU-Y9DKK8 | Grocery | SupplyCo | 108 | 2957.93 | 2086.88 | 33.80 | 3 | 12 | 129 | 7 | Female | 2023-01-15 | 2021-02-02 |
| 1 | 2 | 1 | SKU-GOLA8P | Hardware | GlobalTech | 5 | 2283.99 | 1810.70 | 16.56 | 5 | 22 | 49 | 15 | Male | 2023-05-24 | 2020-02-24 |
| 2 | 3 | 2 | SKU-G2ZGSE | Electronics | SupplyCo | 353 | 2494.71 | 1662.74 | 21.84 | 5 | 5 | 17 | 6 | Male | 2022-05-13 | 2022-02-20 |
| 3 | 4 | 3 | SKU-AHJ8SQ | Electronics | SupplyCo | 73 | 343.22 | 2919.88 | 22.15 | 8 | 1 | 122 | 14 | Female | 2021-12-12 | 2020-12-27 |
| 4 | 5 | 4 | SKU-61ANLS | Grocery | SupplyCo | 21 | 3987.21 | 130.33 | 23.87 | 4 | 18 | 81 | 11 | Male | 2021-12-26 | 2022-05-26 |
In [5]:
df.tail()
Out[5]:
| id | name | sku | category | brand | quantity | price | cost_price | profit_margin | warehouse_id | supplier_id | reorder_level | damaged_units | gender | expiry_date | created_at | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1495 | 1496 | 1495 | SKU-BOEAFZ | Grocery | BrandY | 29 | 630.84 | 2597.12 | 15.99 | 4 | 6 | 60 | 13 | Male | 2020-12-12 | 2022-01-30 |
| 1496 | 1497 | 1496 | SKU-GFU912 | Grocery | GlobalTech | 363 | 4665.67 | 2836.87 | 26.49 | 5 | 3 | 84 | 9 | Male | 2021-07-28 | 2023-06-07 |
| 1497 | 1498 | 1497 | SKU-SZIP99 | Stationery | BrandX | 223 | 1222.33 | 2329.88 | 35.30 | 1 | 29 | 64 | 20 | Female | 2021-03-22 | 2020-12-27 |
| 1498 | 1499 | 1498 | SKU-6DFQX3 | Stationery | BrandZ | 35 | 343.03 | 2041.12 | 20.45 | 1 | 16 | 125 | 18 | Female | 2021-01-23 | 2022-07-23 |
| 1499 | 1500 | 1499 | SKU-VKFSOC | Cosmetics | GlobalTech | 166 | 721.46 | 615.17 | 16.20 | 10 | 21 | 109 | 6 | Female | 2020-08-27 | 2023-07-27 |
In [6]:
df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 1500 entries, 0 to 1499 Data columns (total 16 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 id 1500 non-null int64 1 name 1500 non-null object 2 sku 1500 non-null object 3 category 1500 non-null object 4 brand 1500 non-null object 5 quantity 1500 non-null int64 6 price 1500 non-null float64 7 cost_price 1500 non-null float64 8 profit_margin 1500 non-null float64 9 warehouse_id 1500 non-null int64 10 supplier_id 1500 non-null int64 11 reorder_level 1500 non-null int64 12 damaged_units 1500 non-null int64 13 gender 1500 non-null object 14 expiry_date 1500 non-null object 15 created_at 1500 non-null object dtypes: float64(3), int64(6), object(7) memory usage: 187.6+ KB
In [7]:
df.isnull().sum()
Out[7]:
id 0 name 0 sku 0 category 0 brand 0 quantity 0 price 0 cost_price 0 profit_margin 0 warehouse_id 0 supplier_id 0 reorder_level 0 damaged_units 0 gender 0 expiry_date 0 created_at 0 dtype: int64
In [8]:
target_encoder = LabelEncoder()
In [9]:
target_encoder
Out[9]:
LabelEncoder()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
LabelEncoder()
In [10]:
df["gender"] = target_encoder.fit_transform(df["gender"])
In [11]:
le = LabelEncoder()
le
Out[11]:
LabelEncoder()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
LabelEncoder()
In [12]:
df["sku"] = le.fit_transform(df["sku"])
In [13]:
df["category"] = le.fit_transform(df["category"])
In [14]:
df["expiry_date"] = le.fit_transform(df["expiry_date"])
In [15]:
df["created_at"] = le.fit_transform(df["created_at"])
In [16]:
df["brand"] = le.fit_transform(df["brand"])
In [17]:
df["gender"]
Out[17]:
0 0
1 1
2 1
3 0
4 1
..
1495 1
1496 1
1497 0
1498 0
1499 0
Name: gender, Length: 1500, dtype: int32
In [18]:
df["sku"]
Out[18]:
0 1428
1 711
2 683
3 433
4 254
...
1495 483
1496 706
1497 1214
1498 268
1499 1315
Name: sku, Length: 1500, dtype: int32
In [19]:
df["category"]
Out[19]:
0 3
1 4
2 2
3 2
4 3
..
1495 3
1496 3
1497 5
1498 5
1499 1
Name: category, Length: 1500, dtype: int32
In [20]:
df["expiry_date"]
Out[20]:
0 589
1 675
2 424
3 328
4 337
...
1495 95
1496 243
1497 161
1498 124
1499 22
Name: expiry_date, Length: 1500, dtype: int32
In [21]:
df["created_at"]
Out[21]:
0 234
1 29
2 475
3 210
4 538
...
1495 461
1496 781
1497 210
1498 582
1499 811
Name: created_at, Length: 1500, dtype: int32
In [22]:
X = df.drop("gender",axis = 1)
In [23]:
X
Out[23]:
| id | name | sku | category | brand | quantity | price | cost_price | profit_margin | warehouse_id | supplier_id | reorder_level | damaged_units | expiry_date | created_at | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | 0 | 1428 | 3 | 4 | 108 | 2957.93 | 2086.88 | 33.80 | 3 | 12 | 129 | 7 | 589 | 234 |
| 1 | 2 | 1 | 711 | 4 | 3 | 5 | 2283.99 | 1810.70 | 16.56 | 5 | 22 | 49 | 15 | 675 | 29 |
| 2 | 3 | 2 | 683 | 2 | 4 | 353 | 2494.71 | 1662.74 | 21.84 | 5 | 5 | 17 | 6 | 424 | 475 |
| 3 | 4 | 3 | 433 | 2 | 4 | 73 | 343.22 | 2919.88 | 22.15 | 8 | 1 | 122 | 14 | 328 | 210 |
| 4 | 5 | 4 | 254 | 3 | 4 | 21 | 3987.21 | 130.33 | 23.87 | 4 | 18 | 81 | 11 | 337 | 538 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 1495 | 1496 | 1495 | 483 | 3 | 1 | 29 | 630.84 | 2597.12 | 15.99 | 4 | 6 | 60 | 13 | 95 | 461 |
| 1496 | 1497 | 1496 | 706 | 3 | 3 | 363 | 4665.67 | 2836.87 | 26.49 | 5 | 3 | 84 | 9 | 243 | 781 |
| 1497 | 1498 | 1497 | 1214 | 5 | 0 | 223 | 1222.33 | 2329.88 | 35.30 | 1 | 29 | 64 | 20 | 161 | 210 |
| 1498 | 1499 | 1498 | 268 | 5 | 2 | 35 | 343.03 | 2041.12 | 20.45 | 1 | 16 | 125 | 18 | 124 | 582 |
| 1499 | 1500 | 1499 | 1315 | 1 | 3 | 166 | 721.46 | 615.17 | 16.20 | 10 | 21 | 109 | 6 | 22 | 811 |
1500 rows × 15 columns
In [24]:
y = df["gender"]
In [25]:
y
Out[25]:
0 0
1 1
2 1
3 0
4 1
..
1495 1
1496 1
1497 0
1498 0
1499 0
Name: gender, Length: 1500, dtype: int32
In [26]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size = 0.2,random_state = 42)
In [27]:
X_train
Out[27]:
| id | name | sku | category | brand | quantity | price | cost_price | profit_margin | warehouse_id | supplier_id | reorder_level | damaged_units | expiry_date | created_at | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 382 | 383 | 382 | 1033 | 1 | 2 | 450 | 3296.78 | 1331.37 | 38.27 | 6 | 30 | 13 | 3 | 896 | 342 |
| 538 | 539 | 538 | 1311 | 1 | 2 | 296 | 3874.62 | 1428.85 | 27.58 | 6 | 3 | 51 | 10 | 125 | 682 |
| 1493 | 1494 | 1493 | 612 | 0 | 0 | 488 | 4522.45 | 679.71 | 16.78 | 10 | 29 | 47 | 5 | 378 | 255 |
| 1112 | 1113 | 1112 | 771 | 0 | 2 | 272 | 2190.32 | 476.00 | 5.96 | 4 | 25 | 77 | 11 | 13 | 244 |
| 324 | 325 | 324 | 324 | 0 | 4 | 3 | 2354.73 | 224.33 | 12.72 | 2 | 22 | 110 | 15 | 183 | 43 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 1130 | 1131 | 1130 | 614 | 0 | 0 | 424 | 3440.37 | 635.41 | 28.80 | 6 | 2 | 94 | 3 | 907 | 545 |
| 1294 | 1295 | 1294 | 625 | 5 | 2 | 464 | 878.77 | 1529.35 | 6.62 | 1 | 9 | 118 | 6 | 43 | 411 |
| 860 | 861 | 860 | 757 | 4 | 2 | 90 | 4104.86 | 2403.59 | 28.93 | 1 | 21 | 137 | 5 | 112 | 60 |
| 1459 | 1460 | 1459 | 44 | 1 | 3 | 338 | 1877.36 | 159.50 | 17.04 | 7 | 30 | 88 | 20 | 664 | 769 |
| 1126 | 1127 | 1126 | 434 | 0 | 4 | 352 | 3317.59 | 1235.46 | 29.87 | 9 | 26 | 28 | 15 | 414 | 589 |
1200 rows × 15 columns
In [28]:
X_test
Out[28]:
| id | name | sku | category | brand | quantity | price | cost_price | profit_margin | warehouse_id | supplier_id | reorder_level | damaged_units | expiry_date | created_at | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1116 | 1117 | 1116 | 1469 | 0 | 1 | 50 | 4848.39 | 566.86 | 15.38 | 10 | 15 | 60 | 19 | 208 | 746 |
| 1368 | 1369 | 1368 | 1270 | 0 | 3 | 382 | 4811.31 | 406.38 | 12.76 | 5 | 24 | 137 | 5 | 552 | 289 |
| 422 | 423 | 422 | 349 | 3 | 3 | 265 | 649.57 | 2519.33 | 16.91 | 2 | 12 | 16 | 18 | 461 | 538 |
| 413 | 414 | 413 | 1267 | 5 | 2 | 366 | 2280.60 | 1193.83 | 25.85 | 10 | 3 | 94 | 9 | 68 | 935 |
| 451 | 452 | 451 | 1078 | 0 | 0 | 267 | 2248.81 | 2770.25 | 7.40 | 8 | 19 | 11 | 19 | 751 | 335 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 983 | 984 | 983 | 1120 | 4 | 3 | 104 | 2727.62 | 664.39 | 26.81 | 8 | 11 | 149 | 7 | 217 | 193 |
| 799 | 800 | 799 | 747 | 2 | 0 | 183 | 907.79 | 742.73 | 9.11 | 8 | 14 | 28 | 12 | 452 | 131 |
| 1265 | 1266 | 1265 | 846 | 0 | 1 | 54 | 1963.80 | 2055.25 | 20.18 | 6 | 26 | 54 | 0 | 130 | 883 |
| 1150 | 1151 | 1150 | 1365 | 0 | 3 | 326 | 86.80 | 1288.89 | 39.20 | 10 | 5 | 66 | 19 | 903 | 680 |
| 824 | 825 | 824 | 1224 | 4 | 4 | 249 | 3039.67 | 1313.87 | 11.10 | 7 | 17 | 104 | 1 | 467 | 303 |
300 rows × 15 columns
In [29]:
y_train
Out[29]:
382 0
538 0
1493 0
1112 0
324 0
..
1130 0
1294 0
860 1
1459 1
1126 1
Name: gender, Length: 1200, dtype: int32
In [30]:
y_test
Out[30]:
1116 1
1368 1
422 1
413 0
451 1
..
983 0
799 1
1265 1
1150 0
824 1
Name: gender, Length: 300, dtype: int32
In [31]:
model = LogisticRegression(max_iter = 2000)
model.fit(X_train,y_train)
C:\Users\rp301\anaconda3\Lib\site-packages\sklearn\linear_model\_logistic.py:469: ConvergenceWarning: lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
Increase the number of iterations (max_iter) or scale the data as shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
n_iter_i = _check_optimize_result(
Out[31]:
LogisticRegression(max_iter=2000)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
LogisticRegression(max_iter=2000)
In [32]:
import plotly.express as px
In [33]:
px.line(x= df["brand"],y=df["gender"]).show()
In [34]:
import seaborn as sns
In [35]:
px.pie(values=df["quantity"],names=df["category"]).show()
In [51]:
sns.violinplot(data=df["price"])
plt.show()
In [53]:
y_pred = model.predict(X_test)
In [54]:
y_pred
Out[54]:
array([0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1,
1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1,
1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0,
1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0,
0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1,
1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1,
0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0,
1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0,
1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1,
0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1,
0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1])
In [55]:
print(accuracy_score(y_test,y_pred))
0.47333333333333333
In [59]:
cm = confusion_matrix(y_test,y_pred)
In [60]:
cm
Out[60]:
array([[53, 99],
[59, 89]], dtype=int64)
In [92]:
sns.heatmap(cm,annot = True,cmap = "cividis")
plt.xlabel("Acutal")
plt.ylabel("Predict")
plt.title("Acutal Vs Predict")
plt.show()
In [ ]: